# Creates output to be passed along to rest of model
library(pacman)
p_load(
  here, data.table, fst, purrr, lubridate, dplyr, stringr
)

# First the model coefficients table
good_model_dt = 
  read.fst(
    path = here("Data/electricity-generation/plant-model-fit/good-model-dt.fst"),
    as.data.table = TRUE
  )[,.(
    orispl_code = plant_id_eia, 
    intercept,
    excess_load_cal,
    excess_load_cal_sq = excess_load_sq_cal,
    excess_load_mro,
    excess_load_mro_sq = excess_load_sq_mro,
    excess_load_npcc,
    excess_load_npcc_sq = excess_load_sq_npcc,
    excess_load_rfc,
    excess_load_rfc_sq = excess_load_sq_rfc,
    excess_load_serc,
    excess_load_serc_sq = excess_load_sq_serc,
    excess_load_tre,
    excess_load_tre_sq = excess_load_sq_tre,
    excess_load_wecc,
    excess_load_wecc_sq = excess_load_sq_wecc,
    log_scale, 
    est_extremum_cal,
    est_extremum_mro,
    est_extremum_npcc,
    est_extremum_rfc,
    est_extremum_serc,
    est_extremum_tre,
    est_extremum_wecc,
    loglik, loglik_df
  )] |>
  setkey(orispl_code) 
# Writing the results as a csv
fwrite(
  x = good_model_dt,
  file = here("Data/electricity-generation/output/PPRegressors-oge.csv")
)


# Unit information table 
oge_plant_info_dt =
  rbind(
    read.fst(
      path = here("Data/electricity-generation/plant-info-dt-oge.fst"),
      as.data.table = TRUE
    )[, 
      .SD[which.min(year)], 
      by = .(plant_id_eia)
    ],
    read.fst(
      path = here("Data/electricity-generation/shaped-info-dt-oge.fst"),
      as.data.table = TRUE
    )[, 
      .SD[which.min(year)], 
      by = .(plant_id_eia)
    ],
    fill = TRUE
  )[plant_id_eia %in% good_model_dt$orispl_code] |>
  setkey(plant_id_eia)

fwrite(
  x = oge_plant_info_dt[,.(orispl_code = plant_id_eia, year, namepcap)],
  file = here("Data/electricity-generation/output/PPCap-oge.csv")
)
# Stack heights
fwrite(
  x = oge_plant_info_dt[,.(orispl_code = plant_id_eia, stack_height)],
  file = here("Data/electricity-generation/output/StackHeight-oge.csv")
)

# Load table 
nerc_load_dt_raw = 
  read.fst(
    path = here("Data/electricity-generation/clean-load/nerc-load-dt-oge.fst"),
    as.data.table = TRUE
  ) |>
  setkey(nerc_adj, datetime_utc)

# Reading the electricity generation files (with fitted values)
elec_gen_dt = 
  read.fst(
    path = here("Data/electricity-generation/plant-model-fit/elec-gen-fit-dt.fst"),
    as.data.table = TRUE
  ) |>
  merge(
    oge_plant_info_dt,
    by = c('plant_id_eia')
  )

# Summarizing production by region
nerc_gen_dt = 
  elec_gen_dt[,.(
    tot_net_generation_mwh = sum(net_generation_mwh),
    tot_fit_net_generation_mwh = sum(fit_net_generation_mwh)), 
    keyby = .(nerc_adj, datetime_utc)
  ]

# Adding gen data to load data
nerc_load_dt = 
  merge(
    nerc_gen_dt,
    nerc_load_dt_raw, 
    by = c("nerc_adj","datetime_utc")
  )
# Renewables 
fwrite(
  x = nerc_load_dt[,.(
    nerc_adj, utc_time = datetime_utc, 
    load_nd, 
    load_solar, 
    load_wind = load_nd - load_solar,
    load_hydro
  )],
  file = here("Data/electricity-generation/output/RegRenew-oge.csv")
)
# Total Load
fwrite(
  x = nerc_load_dt[,.(
    nerc_adj, utc_time = datetime_utc, 
    excess_load, 
    excess_load_adj = excess_load - load_ff + tot_net_generation_mwh
  )],
  file = here("Data/electricity-generation/output/RegLoad-oge.csv")
)



# Now loading coefficients for own region only 
own_region_model_dt = 
  read.fst(
    path = here("Data/electricity-generation/plant-model-fit/own-region-model-dt.fst"),
    as.data.table = TRUE
  )[,.(
    orispl_code = plant_id_eia, 
    intercept,
    excess_load_cal,
    excess_load_cal_sq = excess_load_sq_cal,
    excess_load_mro,
    excess_load_mro_sq = excess_load_sq_mro,
    excess_load_npcc,
    excess_load_npcc_sq = excess_load_sq_npcc,
    excess_load_rfc,
    excess_load_rfc_sq = excess_load_sq_rfc,
    excess_load_serc,
    excess_load_serc_sq = excess_load_sq_serc,
    excess_load_tre,
    excess_load_tre_sq = excess_load_sq_tre,
    excess_load_wecc,
    excess_load_wecc_sq = excess_load_sq_wecc,
    log_scale, 
    est_extremum_cal,
    est_extremum_mro,
    est_extremum_npcc,
    est_extremum_rfc,
    est_extremum_serc,
    est_extremum_tre,
    est_extremum_wecc,
    loglik, loglik_df
  )] |>
  setkey(orispl_code) 

# Writing the results as a csv
fwrite(
  x = own_region_model_dt,
  file = here("Data/electricity-generation/output/PPRegressors-own-region-oge.csv")
)



# Calculating correlation between regions 
p_load(corrplot)
nerc_load_dt_raw[year(datetime_utc) == 2019, .(nerc_adj, datetime_utc, excess_load)] |>
  dcast(datetime_utc ~ nerc_adj, value.var ='excess_load') %>% 
  .[,-'datetime_utc'] |> 
  cor() |>
  corrplot(
    type = 'upper',tl.col = "black", tl.srt = 45
  )



